In this document, we will give you several examples of Bayesian data analysis.
library(dplyr)
library(tibble)
library(purrr)
library(tidyr)
library(forcats)
library(gtools)
library(patchwork)
library(broom)
library(broom.mixed)
library(modelr)
library(brms)
library(tidybayes)
library(ggdist)
library(bayesplot)
library(ggplot2)
library(knitr)
BRM_BACKEND <- ifelse(require("cmdstanr"), 'cmdstanr', 'rstan')
The following dataset is from experiment 2 of “How Relevant are Incidental Power Poses for HCI?” (Jansen & Hornbæk, 2018). Study participants were asked to either make an expansive posture or a constrictive posture before performing a task. The experiment investigated whether posture could potentially have an effect on risk taking behavior.
First, we load the data.
pose_df = readr::read_csv("data/poses_data.csv", show_col_types = FALSE) %>%
mutate(condition) %>%
group_by(participant)
head(pose_df %>% select(participant, condition, change))
| participant | condition | change |
|---|---|---|
| 1 | expansive | 3.362832 |
| 2 | constrictive | 29.147982 |
| 3 | expansive | 25.409836 |
| 4 | constrictive | 54.069767 |
| 5 | expansive | -36.644592 |
| 6 | constrictive | 29.756098 |
Let’s plot out data
wrap_plots(
pose_df %>%
ggplot(aes(x = change, fill = condition)) +
geom_histogram(binwidth = 10) +
coord_cartesian(xlim = c(-50, 200)) +
scale_color_theme() +
theme_density_x,
pose_df %>%
ggplot(aes(x = change)) +
geom_point(aes(y = 0, color = condition), size = 2) +
coord_cartesian(xlim = c(-50, 200)) +
scale_color_theme() +
theme_density_x,
nrow = 2)
The data has been aggregated for each participant: -
condition = expansive indicates expansive posture, and
condition = constrictive indicates constrictive posture -
The dependent variable is change which indicates the
percentage change in risk-taking behavior. Thus, it is a continuous
variable.
For the purposes of this demo, we are only concerned with these two variables. We can ignore the other variables for now.
The Bayesian t-test (BEST) assumes that the data in the two conditions arises from two separate t-distributions. In the following section, we will describe the process for one of the conditions in the experiment.
We will use the \(Normal(\mu = 20, \sigma = 20)\) as the prior distribution.
First, we define some functions for manual calculation of the posterior normal distribution:
sigma_post = function(sigma_prior, sigma, n = 1) {
sqrt(1 / (1 / (sigma_prior^2) + n / (sigma^2)))
}
mu_post = function(mu_prior, sigma_prior, mu, sigma, n = 1) {
tau = sigma_post(sigma_prior, sigma, n)
(tau^2 / sigma_prior^2)*mu_prior + (n * tau^2 / sigma^2)*mu
}
d.p2 = tibble(
group = c("prior", "expansive", "constrictive", "posterior"),
mu = c(0, 32.82, 31.61, mu_post(0, 20, 32.82, 7.52)),
sd = c(20, 7.52, 7.06, sigma_post(20, 7.52))
) %>%
mutate(
cutoff_group = list(c(1:7)),
cutoff = list(c(0, 15, 25, 30, 32.82, 40, 100))
) %>%
unnest(c(cutoff_group, cutoff)) %>%
mutate(
cutoff = if_else(group == 'prior', 100, cutoff),
x = map(cutoff, ~seq(from = -100, ., by = 0.1)),
y = pmap(list(x, mu, sd), ~ dnorm(..1, ..2, ..3))
)
In the plot below, we show the raw data distribution for the two conditions:
p1 = pose_df %>%
ggplot() +
geom_point(aes(x = change, y = condition, colour = condition),
position = position_jitter(height = 0.1), alpha = 0.7) +
scale_color_theme() +
labs(y = "Condition") +
theme(
legend.position = "none",
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank()
) +
scale_x_continuous(limits = c(-150, 250), breaks = seq(-150, 250, by = 50))
p2.blank = tibble(y = c("expansive", "constrictive"), x = 0) %>%
ggplot(aes(x, y)) +
scale_color_theme() +
theme_density
cowplot::plot_grid(p2.blank, p1, nrow = 2)
First, we define a function to help us plot different cutoff groups.
plot_preliminary <- function(data, group_num, group_name, linewidth_in = 1){
data %>%
filter(cutoff_group == group_num & group %in% group_name) %>%
unnest(c(x, y)) %>%
ggplot(aes(x, y)) +
#geom_line(aes(color = group), size = 1) +
# density
geom_area(aes(fill = group, color = group, alpha = as.character(group)), position = "identity", linewidth = linewidth_in) +
scale_x_continuous(limits = c(-150, 250)) +
scale_y_continuous(limits = c(0, 0.1)) +
scale_alpha_manual(
values = c(
'posterior' = .5,
'prior' = .3,
'expansive' = .3,
'constrictive' = .3
),
) +
scale_color_theme() +
theme_density +
theme(legend.position = 'none')
}
cowplot::plot_grid(
plot_preliminary(d.p2, 0, NULL),
p1, nrow = 2)
Next, we plot the prior density:
cowplot::plot_grid(
plot_preliminary(d.p2, 1, 'prior'),
p1, nrow = 2)
Then we describe step by step, how the likelihood is computed:
cowplot::plot_grid(
plot_preliminary(d.p2, 1, 'expansive'),
p1, nrow = 2)
cowplot::plot_grid(
plot_preliminary(d.p2, 2, 'expansive'),
p1, nrow = 2)
cowplot::plot_grid(
plot_preliminary(d.p2, 3, 'expansive'),
p1, nrow = 2)
cowplot::plot_grid(
plot_preliminary(d.p2, 5, 'expansive'),
p1, nrow = 2)
cowplot::plot_grid(
plot_preliminary(d.p2, 6, 'expansive'),
p1, nrow = 2)
cowplot::plot_grid(
plot_preliminary(d.p2, 7, 'expansive'),
p1, nrow = 2)
cowplot::plot_grid(
plot_preliminary(d.p2, 7, c('prior', 'expansive')),
p1, nrow = 2)
We want to compute the posterior, which is the product of the prior and likelihood:
cowplot::plot_grid(
plot_preliminary(d.p2, 7, c('prior', 'expansive'), linewidth_in = 0),
p1, nrow = 2)
cowplot::plot_grid(
plot_preliminary(d.p2, 7, c('posterior')),
p1, nrow = 2)
We plot the data distribution, the empirical density curve (blue line), and the theoretical density curve (black line).
pose_df %>%
mutate(c = as.factor(condition)) %>%
ggplot(aes(x = change)) +
# data distribution
geom_histogram(
aes(y = ..density..),
binwidth = 10,
fill = theme_yellow,
alpha = .75,
color = 'white'
) +
# empirical density curve
geom_density(size = 1,
adjust = 1.5,
color = theme_blue) +
# theoretical density curve, a t distribition with mean = 16, sd = 19, and nu = 6
geom_function(
color = "#222222",
linetype = 'dashed',
fun = function(x)
dstudent_t(x, mu = 16, sigma = 39, df = 6),
size = 1
) +
scale_x_continuous(limits = c(-200, 200)) +
theme(
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 20),
axis.title.x = element_text(size = 24)
)
Here we use a mathematical expression for the model above.
\[ \begin{align} y_{i} &\sim \mathrm{Student\_t}(\mu, \sigma_{0}, \nu_{0}) \\ \mu &= \beta_{0} + \beta_{1} * x_i \\ \sigma_{0} &\sim \mathrm{HalfNormal}(0, 10) \\ \beta_{0} &\sim \mathrm{?} \\ \beta_{1} &\sim \mathrm{Normal}(0,2) \\ \nu_{0} &\sim \mathrm{?} \\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\} \end{align} \]
We translate this thought into brms formula using the
function bf.
model.1.formula <- bf(
# we think change is affected by different conditions.
change ~ condition,
# to tell brms which response distribution to use
family = student()
)
Now we can use get_prior() to inspect the available
priors and formula. As what we learned, we have priors for
tibble(get_prior(model.1.formula, pose_df))
| prior | class | coef | group | resp | dpar | nlpar | lb | ub | source |
|---|---|---|---|---|---|---|---|---|---|
| b | default | ||||||||
| b | conditionexpansive | default | |||||||
| student_t(3, 22.6, 38.8) | Intercept | default | |||||||
| gamma(2, 0.1) | nu | 1 | default | ||||||
| student_t(3, 0, 38.8) | sigma | 0 | default |
We look at the default priors from brms.
cowplot::plot_grid(
tibble(x = qstudent_t(ppoints(n = 500), df = 3, mu = 22.6, sigma = 38.8)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('student_t(3, 22.6, 38.8) for Intercept') +
coord_cartesian(xlim = c(-300, 300),expand = c(0)),
tibble(x = qgamma(ppoints(n = 500), shape = 2, rate = .1)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('Gamma(2, 0.1) for nu') +
coord_cartesian(xlim = c(1, 100), expand = 0),
tibble(x = qstudent_t(ppoints(n = 500), df = 3, mu = 0, sigma = 38.8)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('student_t(3, 0, 38.8) for sigma') +
coord_cartesian(xlim = c(0, 400),expand = c(0)),
ncol = 3)
We do a prior check for default priors to see the range of prior predictions.
model.1.checks_default <- brm(
model.1.formula,
data = pose_df,
family = student_t(),
prior = c(
# need proper priors for prior predictive checks
prior(normal(0, 3), class = 'b')
),
# to allow draw from prior distributions
sample_prior = 'only',
backend = BRM_BACKEND,
# save the model
file = 'rds/model.1.checks_default.rds',
file_refit = 'on_change'
)
We draw from prior distributions using
predicted_draws.
model.1.defaultpriorsamples <-
model.1.checks_default %>%
predicted_draws(tibble(condition = c('expansive', 'constrictive')))
Take a look at the prior prediction draws. You can ignore
.row,.chain, ‘.iteration’. The
.draw is the ID for each draw. .prediction is
the value we care about.
head(model.1.defaultpriorsamples)
| condition | .row | .chain | .iteration | .draw | .prediction |
|---|---|---|---|---|---|
| expansive | 1 | NA | NA | 1 | 71.188604 |
| expansive | 1 | NA | NA | 2 | 155.807150 |
| expansive | 1 | NA | NA | 3 | 9.000583 |
| expansive | 1 | NA | NA | 4 | 75.183570 |
| expansive | 1 | NA | NA | 5 | 18.108887 |
| expansive | 1 | NA | NA | 6 | -48.676288 |
We plot out the prediction draws. They look reasonable but have a wide range. We would expect so given the ranges of the priors.
model.1.defaultpriorsamples %>%
ggplot(aes(x = .prediction, group = condition)) +
geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
theme_density_x +
ggtitle('Checks default priors of model.1')
We want to use narrow priors.
cowplot::plot_grid(
tibble(x = qstudent_t(ppoints(n = 1000), mu = 22.6, sigma = 10, df = 3)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('t(3, 22.6, 10) for Intercept') +
coord_cartesian(xlim = c(-30, 80), expand = 0),
tibble(x = qnorm(ppoints(n = 1000), mean = 0, sd = 2)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('Normal(0, 2) for b') +
coord_cartesian(xlim = c(-8, 8), expand = 0),
tibble(x = qnorm(ppoints(n = 1000), mean = 0, sd = 10)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('HalfNormal(0, 10) for sigma') +
coord_cartesian(xlim = c(0, 50), expand = c(0)),
ncol = 3)
We can do another prior check.
model.1.checks <- brm(
model.1.formula,
data = pose_df,
family = student_t(),
prior = c(
prior(student_t(3, 22.6, 10), class = "Intercept"),
prior(normal(0, 2), class = 'b'),
prior(normal(0, 10), class = 'sigma', lb = 0)
),
sample_prior = 'only',
backend = BRM_BACKEND,
file = 'rds/model.1.checks.rds',
file_refit = 'on_change'
)
We draw from the new prior distribution.
model.1.priorsamples <-
model.1.checks %>%
predicted_draws(tibble(condition = c('expansive', 'constrictive')))
We compare two sets of prior distributions. Visually, using our priors have a narrower range.
cowplot::plot_grid(
model.1.defaultpriorsamples %>%
ggplot(aes(x = .prediction, group = condition)) +
geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
theme_density_x +
# coord_cartesian(xlim = c(-800, 1000)) +
ggtitle('Checks for default priors of model.1')
,
model.1.priorsamples %>%
ggplot(aes(x = .prediction, group = condition)) +
geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
theme_density_x +
# coord_cartesian(xlim = c(-800, 1000)) +
ggtitle('Checks for our priors of model.1')
,
ncol = 1)
Now we can fit the model. This model takes a
model.1 <- brm(
model.1.formula,
data = pose_df,
family = student_t(),
prior = c(
prior(normal(0, 2), class = 'b'),
prior(normal(0, 10), class = 'sigma', lb = 0)
),
backend = BRM_BACKEND,
file = 'rds/model.1.rds'
)
First, we check the MCMC traces to ensure that the chains are mixed well.
color_scheme_set("teal")
mcmc_trace(model.1, facet_args = list(ncol = 4))
plot(model.1)
We also check Rhat and ESS. We want Rhat to be close to 1 to ensure model convergence. We want Bulk ESS (effective sample size) to be at least a few hundreds (ideally, this should be at least a thousand.) to ensure reliable estimate of mean. We also want Tail ESS to be at this level. If ESSs are too low, it means there are too many correlations in posteriors draws. You need to increase the number of iterations and perhaps the number of chains.
summary(model.1)
## Family: student
## Links: mu = identity; sigma = identity; nu = identity
## Formula: change ~ condition
## Data: pose_df (Number of observations: 80)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 22.84 4.49 14.50 31.77 1.00 2686 2092
## conditionexpansive -0.26 1.97 -4.10 3.62 1.00 2726 2987
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 28.35 4.13 20.76 36.92 1.00 2180 2520
## nu 3.68 2.35 1.57 9.04 1.00 2172 2158
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pose_df.bayesiant.y <- pose_df$change
pose_df.bayesiant.yrep <- posterior_predict(model.1, ndraws = 30, seed = 1234)
ppc_dens_overlay(pose_df.bayesiant.y, pose_df.bayesiant.yrep, linewidth = 2)
Now we check the posterior prediction of this model to ensure that it generates reasonable predictions.
model.1.predictions <-
model.1 %>%
predicted_draws(tibble(condition = c('expansive', 'constrictive')))
head(model.1.predictions)
| condition | .row | .chain | .iteration | .draw | .prediction |
|---|---|---|---|---|---|
| expansive | 1 | NA | NA | 1 | 22.785243 |
| expansive | 1 | NA | NA | 2 | -38.544134 |
| expansive | 1 | NA | NA | 3 | -2.368428 |
| expansive | 1 | NA | NA | 4 | 23.209345 |
| expansive | 1 | NA | NA | 5 | -0.189108 |
| expansive | 1 | NA | NA | 6 | 33.318285 |
plot_predictions <- function(model, df = NULL, title = ''){
if(is.null(df))
df = tibble(condition = c('expansive', 'constrictive'),
participant = c(-1, -1))
model %>%
predicted_draws(df,
seed = 1234,
ndraws = NULL,
allow_new_levels = TRUE,
sample_new_levels = 'uncertainty') %>%
ggplot(aes(x = .prediction, colour = condition), fill = NA) +
geom_density(alpha = .5, size = 1, adjust = 2) +
scale_color_theme() +
theme_density_x +
scale_y_continuous(breaks = 0, labels = 'constrictive') +
scale_x_continuous(breaks = seq(-150, 250, by = 50)) +
coord_cartesian(xlim = c(-150, 250)) +
ggtitle(paste0('Posterior predictions of ', deparse(substitute(model))))
}
cowplot::plot_grid(plot_predictions(model.1),
p1 +
scale_x_continuous(breaks = seq(-150, 250, by = 100)) +
coord_cartesian(xlim = c(-150, 250)),
nrow = 2)
We now generate the posterior predictions of means, which are of interests here.
model.1.posteriors <-
model.1 %>%
epred_draws(tibble(condition = c('expansive', 'constrictive')))
plot_posteriors <- function(model, df = NULL, title = ''){
if(is.null(df))
df = tibble(condition = c('expansive', 'constrictive'))
model %>%
epred_draws(df,
# ignoring random effects if there is any
#seed = 1234,
ndraws = NULL,
re_formula = NA) %>%
ggplot(aes(x = .epred, fill = condition)) +
geom_density(alpha = .5, size = 1, adjust = 2, color = NA) +
scale_color_theme() +
theme_density_x +
scale_y_continuous(breaks = 0, labels = 'constrictive') +
scale_x_continuous(limits = c(-150, 250), breaks = seq(-150, 250, by = 50)) +
ggtitle(paste0('Posterior means of ', deparse(substitute(model))))
}
cowplot::plot_grid(plot_posteriors(model.1), p1, nrow = 2)
This model is the BEST test model as described by Kruschke in the paper Bayesian estimation supersedes the t-test. In this model, \(\beta\) indicates the mean difference in the outcome variable between the two groups (in this case, the percent change in the BART scores). We fit different priors on \(\beta\) and set different weights on these priors to obtain our posterior estimate.
\[ \begin{align} y_{i} &\sim \mathrm{T}(\nu, \mu, \sigma) \\ \mu &= \beta_{0} + \beta_{1} * x_i \\ \sigma &= \sigma_{a} + \sigma_{b}*x_i \\ \beta_{1} &\sim \mathrm{Normal}(\mu_{0}, \sigma_{0}) \\ \sigma_a, \sigma_b &\sim \mathrm{Cauchy}(0, 2) \\ \nu &\sim \mathrm{exp}(1/30)\\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\} \end{align} \]
model.2.formula <- bf(# we think change is affected by different conditions.
change ~ condition,
sigma ~ condition,
# to tell brms which response distribution to use
family = student())
tibble(get_prior(model.2.formula, pose_df))
| prior | class | coef | group | resp | dpar | nlpar | lb | ub | source |
|---|---|---|---|---|---|---|---|---|---|
| b | default | ||||||||
| b | conditionexpansive | default | |||||||
| student_t(3, 22.6, 38.8) | Intercept | default | |||||||
| gamma(2, 0.1) | nu | 1 | default | ||||||
| b | sigma | default | |||||||
| b | conditionexpansive | sigma | default | ||||||
| student_t(3, 0, 2.5) | Intercept | sigma | default |
cowplot::plot_grid(
tibble(x = qnorm(ppoints(n = 1000), mean = 0, sd = 2)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('Normal(0 ,2) for Intercept') +
coord_cartesian(expand = c(0)),
tibble(x = qexp(ppoints(n = 1000), rate = 0.0333)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('exponential(0.0333) for nu') +
coord_cartesian(expand = 0),
tibble(x = qcauchy(ppoints(n = 1000), location = 0, scale = 2)) %>%
ggplot() +
geom_density(aes(x = x), fill = theme_yellow, color = NA) +
ggtitle('cauchy(0, 2) for sigma') +
coord_cartesian(expand = c(0)),
ncol = 3)
model.2.priorchecks <- brm(
model.2.formula,
data = pose_df,
family = student_t(),
prior = c(
prior(normal(0, 2), class = 'b'),
prior(cauchy(0, 2), class = 'b', dpar = 'sigma'),
prior(exponential(0.0333), class = 'nu')
),
sample_prior = 'only',
backend = BRM_BACKEND,
file = 'rds/model.2.priorchecks.rds',
file_refit = 'on_change'
)
model.2.priorchecks %>%
epred_draws(tibble(condition = c('expansive', 'constrictive'))) %>%
ggplot(aes(x = .epred, group = condition)) +
geom_density(alpha = .5, color = NA, adjust = 2, fill = theme_yellow) +
theme_density_x +
ggtitle('Checks priors of model.2')
model.2 <- brm(
model.2.formula,
data = pose_df,
family = student_t(),
prior = c(
prior(normal(0, 2), class = 'b'),
prior(cauchy(0, 2), class = 'b', dpar = 'sigma'),
prior(exponential(0.0333), class = 'nu')
),
backend = BRM_BACKEND,
file = 'rds/model.2.rds',
file_refit = 'on_change'
)
summary(model.2)
## Family: student
## Links: mu = identity; sigma = log; nu = identity
## Formula: change ~ condition
## sigma ~ condition
## Data: pose_df (Number of observations: 80)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 24.52 4.88 15.67 34.84 1.00 2333
## sigma_Intercept 3.41 0.20 3.00 3.79 1.00 2215
## conditionexpansive -0.09 1.96 -3.93 3.64 1.00 3639
## sigma_conditionexpansive 0.15 0.21 -0.27 0.57 1.00 3051
## Tail_ESS
## Intercept 2276
## sigma_Intercept 2420
## conditionexpansive 2377
## sigma_conditionexpansive 2668
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## nu 6.03 8.24 1.76 24.62 1.00 1860 1782
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
wrap_plots(plot_predictions(model.2) + scale_x_continuous(limits = c(-200,250), expand = c(0,0), breaks = seq(-150, 250, by = 50))
,
p1 + scale_x_continuous(limits = c(-200,250), expand = c(0,0), breaks = seq(-150, 250, by = 50)) ,
nrow = 2)
wrap_plots(
plot_posteriors(model.2),
p1,
nrow = 2)
model.2.posteriors <- model.2 %>%
epred_draws(tibble(condition = c('expansive', 'constrictive')),
#seed = 1234,
ndraws = NULL,
re_formula = NA)
\[ \begin{align} y_{i} &\sim \mathrm{T}(\nu, \mu, \sigma) \\ \mu &= \beta_{i,j} + \beta_{1} * x_i \\ \sigma &= \sigma_{a} + \sigma_{b}*x_i \\ \beta_{1} &\sim \mathrm{Normal}(0, 2) \\ \sigma_a, \sigma_b &\sim \mathrm{HalfNormal}(0, 10) \\ \nu &\sim \mathrm{exp}(1/30)\\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\}\\ j & \in \{1, ..., \mathrm{N}\} \end{align} \]
model.3.formula <- bf(# we think change is affected by different conditions.
change ~ condition + (1|participant),
sigma ~ condition,
# to tell brms which response distribution to use
family = student())
tibble(get_prior(model.3.formula, pose_df))
| prior | class | coef | group | resp | dpar | nlpar | lb | ub | source |
|---|---|---|---|---|---|---|---|---|---|
| b | default | ||||||||
| b | conditionexpansive | default | |||||||
| student_t(3, 22.6, 38.8) | Intercept | default | |||||||
| gamma(2, 0.1) | nu | 1 | default | ||||||
| student_t(3, 0, 38.8) | sd | 0 | default | ||||||
| sd | participant | default | |||||||
| sd | Intercept | participant | default | ||||||
| b | sigma | default | |||||||
| b | conditionexpansive | sigma | default | ||||||
| student_t(3, 0, 2.5) | Intercept | sigma | default |
model.3 <- brm(
model.3.formula,
data = pose_df,
family = student_t(),
prior = c(
prior(normal(0, 2), class = 'b'),
prior(normal(0, 2), class = 'b', dpar = 'sigma'),
prior(normal(0, 2), class = 'sd', group = 'participant', lb = 0),
prior(exponential(0.0333), class = 'nu')
),
#backend = BRM_BACKEND,
file = 'rds/model.3.rds',
file_refit = 'on_change'
)
summary(model.3)
## Family: student
## Links: mu = identity; sigma = log; nu = identity
## Formula: change ~ condition + (1 | participant)
## sigma ~ condition
## Data: pose_df (Number of observations: 80)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Group-Level Effects:
## ~participant (Number of levels: 80)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.63 1.23 0.06 4.57 1.00 3650 2196
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 24.48 4.77 15.62 34.43 1.00 5853
## sigma_Intercept 3.40 0.20 3.01 3.79 1.00 5493
## conditionexpansive -0.09 1.92 -3.77 3.73 1.00 12370
## sigma_conditionexpansive 0.15 0.21 -0.29 0.58 1.00 9195
## Tail_ESS
## Intercept 2933
## sigma_Intercept 2867
## conditionexpansive 2502
## sigma_conditionexpansive 2684
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## nu 6.09 8.76 1.78 24.32 1.00 4880 2600
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
cowplot::plot_grid(plot_predictions(model.3), p1, nrow = 2)
cowplot::plot_grid(plot_posteriors(model.3), p1, nrow = 2)
model.3.posteriors <-
model.3 %>%
epred_draws(tibble(condition = c('expansive', 'constrictive')),
re_formula = NA)
wrap_plots(
model.3.posteriors %>%
mutate(model = 'model 3') %>%
rbind(
model.2.posteriors %>%
mutate(model = 'model 2')) %>%
rbind(
model.1.posteriors %>%
mutate(model = 'model 1')) %>%
ggplot() +
geom_density(aes(x = .epred, fill = condition), adjust = 1.5, color = NA, alpha = .5) +
facet_grid(model ~ .) +
scale_x_continuous(limits = c(-150, 250), breaks = seq(-150, 250, by = 50)) +
scale_color_theme() +
theme_density_x +
ggtitle('Compare the means of all three models'),
p1, nrow = 2, heights = c(4,1.5))
pose_raw_df = read.csv("data/posture_data-raw.csv") %>%
mutate(participant = factor(participant)) %>%
rename(trial = trial.number)
head(pose_raw_df)
| participant | condition | total.money | trial | trial.money | exploded | pumps | life |
|---|---|---|---|---|---|---|---|
| 1 | expansive | 910 | 0 | 62 | 0 | 62 | 72 |
| 1 | expansive | 910 | 1 | 0 | 1 | 27 | 27 |
| 1 | expansive | 910 | 2 | 47 | 0 | 47 | 98 |
| 1 | expansive | 910 | 3 | 0 | 1 | 86 | 86 |
| 1 | expansive | 910 | 4 | 60 | 0 | 60 | 104 |
| 1 | expansive | 910 | 5 | 0 | 1 | 26 | 26 |
tibble(x = seq(1, 160, by = 1)) %>%
ggplot(aes(x)) +
geom_function(
color = theme_blue,
linetype = 'dashed',
fun = function(x) # need to fix
dnbinom(round(x), size = 5, mu = 64),
size = 1
) +
# stat_function(geom = "area", fill = theme_red, fun = function(x) dnbinom(round(x), size = 5, mu = 64), xlim = c(128, 160), alpha = 0.5) +
geom_vline(xintercept = 0, color = "red", size = 1, alpha = 0.5) +
geom_vline(xintercept = 128, color = "red", size = 1, alpha = 0.5) +
coord_cartesian(xlim = c(0, 160)) +
theme(
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 16),
axis.title.x = element_blank()
)
pose_raw_df %>%
mutate(c = as.factor(condition)) %>%
ggplot(aes(x = pumps)) +
geom_histogram(
aes(y = ..density..),
binwidth = 2,
fill = theme_yellow,
alpha = .5,
color = 'white'
) +
geom_density(size = 1,
adjust = 3,
color = theme_blue) +
geom_function(
color = "#222222",
linetype = 'dashed',
fun = function(x) # need to fix
dnbinom(round(x), size = 3, mu = 40), # this does not use the log-link but brm does so the prior below is # log(387)
size = 1
) +
theme(
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank()
)
\[ \begin{align} y_{i} &\sim \mathrm{Neg-Binomial}(\mu, \phi) \\ log(\mu) &= \beta_{0} + \beta_{1} . x_i \\ \beta_{0} &\sim \mathrm{Normal}(4.1, 0.5) \\ \beta_{0, j} &\sim \mathrm{Student\_t}(3, 0, 1) \\ \beta_{1} &\sim \mathrm{Normal}(0, 0.5) \\ \phi &\sim \mathrm{Gamma}(5, 1) \\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\}\\ \end{align} \]
model.4.prior_pred = brm(pumps ~ 1 + condition,
data = pose_raw_df, family = negbinomial(),
prior = c(prior(normal(4.1, 0.5), class = Intercept),
prior(normal(0, 0.5), class = b),
prior(gamma(5, 1), class = shape)
),
backend = BRM_BACKEND,
file = 'rds/model.4.prior_predictive.rds',
file_refit = 'on_change' ,
sample_prior = "only",
iter = 4000, warmup = 1000, cores = 4, chains = 4)
model.4.prior_pred.samples <- model.4.prior_pred %>%
predicted_draws(tibble(condition = c('expansive', 'constrictive')), re_formula = NA)
head(model.4.prior_pred.samples)
| condition | .row | .chain | .iteration | .draw | .prediction |
|---|---|---|---|---|---|
| expansive | 1 | NA | NA | 1 | 99 |
| expansive | 1 | NA | NA | 2 | 113 |
| expansive | 1 | NA | NA | 3 | 19 |
| expansive | 1 | NA | NA | 4 | 66 |
| expansive | 1 | NA | NA | 5 | 30 |
| expansive | 1 | NA | NA | 6 | 56 |
ggplot() +
geom_histogram(
data = pose_raw_df,
mapping = aes(x = pumps, y = ..density..),
binwidth = 10,
alpha = 0.5,
fill = theme_yellow,
color = 'white'
) +
geom_density(model.4.prior_pred.samples,
mapping = aes(x = .prediction, y = ..density..), adjust = 1, stroke = theme_blue, size = 1) +
theme_density_x +
coord_cartesian(xlim = c(0, 150)) +
theme(
axis.text.x = element_text(size = 16)
)
model.4 = brm(pumps ~ 1 + condition,
data = pose_raw_df, family = negbinomial(),
prior = c(prior(normal(4.1, 0.5), class = Intercept),
prior(normal(0, 0.5), class = b),
prior(gamma(5, 1), class = shape)
),
backend = BRM_BACKEND,
file = 'rds/model.4.rds',
file_refit = 'on_change' ,
iter = 4000, warmup = 1000, cores = 4, chains = 4)
## Running MCMC with 4 parallel chains...
##
## Chain 1 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 1 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 2 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 3 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 3 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 4 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 4 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 1 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 1 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 1 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 2 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 2 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 3 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 4 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 4 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 1 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 1 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 2 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 2 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 3 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 4 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 4 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 1 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 1 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 2 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 3 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 3 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 3 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 4 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 4 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 1 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 1 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 1 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 2 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 2 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 2 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 3 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 3 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 3 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 4 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 1 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 1 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 2 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 2 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 3 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 3 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 4 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 4 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 1 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 1 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 2 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 2 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 2 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 3 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 4 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 4 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 1 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 1 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 2 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 2 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 3 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 3 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 4 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 4 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 1 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 1 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 2 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 2 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 3 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 3 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 4 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 4 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 1 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 2 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 2 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 3 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 3 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 4 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 4 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 1 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 1 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 2 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 2 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 3 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 3 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 4 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 4 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 1 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 1 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 2 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 2 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 3 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 4 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 1 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 1 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 2 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 2 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 3 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 3 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 4 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 4 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 1 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 1 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 2 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 2 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 3 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 3 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 4 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 4 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 1 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 1 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 2 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 2 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 3 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 3 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 4 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 4 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 1 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 1 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 2 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 2 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 2 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 4 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 4 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 1 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 2 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 2 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 3 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 3 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 4 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 1 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 1 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 2 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 2 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 3 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 3 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 4 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 4 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 1 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 1 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 2 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 2 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 3 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 4 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 4 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 2 finished in 2.0 seconds.
## Chain 1 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 1 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 3 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 3 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 4 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 4 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 1 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 1 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 3 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 4 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 4 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 1 finished in 2.2 seconds.
## Chain 3 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 3 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 4 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 3 finished in 2.3 seconds.
## Chain 4 finished in 2.3 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 2.2 seconds.
## Total execution time: 2.4 seconds.
summary(model.4)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: pumps ~ 1 + condition
## Data: pose_raw_df (Number of observations: 2400)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.64 0.01 3.61 3.66 1.00 11789 9145
## conditionexpansive -0.01 0.02 -0.05 0.03 1.00 10845 8766
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 4.59 0.15 4.32 4.89 1.00 10138 8289
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pose_raw_df.bayesian_poisson.y <- pose_raw_df$pumps
pose_raw_df.bayesian_poisson.yrep <- posterior_predict(model.4, ndraws = 30, seed = 1234)
ppc_dens_overlay(y = pose_raw_df.bayesian_poisson.y,
yrep = pose_raw_df.bayesian_poisson.yrep)
The results of this model are on the log-odds scale. What do the coefficients mean? The simplest way is to simply transform the data into a more interpretable scale and visualise the results:
p.model.4 = pose_raw_df %>%
ggplot() +
geom_point(aes(x = pumps, y = condition, colour = condition),
position = position_jitter(height = 0.1), alpha = 0.7) +
scale_color_theme() +
labs(y = "Condition") +
theme(
legend.position = "none",
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank()
) +
scale_x_continuous(limits = c(0, 130), breaks = seq(0, 150, by = 30))
draws.model.4 = plot_predictions(model.4, df = crossing(condition = c('expansive', 'constrictive'),
trial = 0:29,
participant = 0)) +
scale_x_continuous(breaks = seq(0, 150, by = 30)) +
coord_cartesian(xlim = c(0, 150))
wrap_plots(
draws.model.4,
p.model.4,
nrow = 2)
We generate the average of 30 trials using an average participant.
model.4.posteriors <- model.4 %>%
epred_draws(crossing(condition = c('expansive', 'constrictive'),
trial = 0:29),
re_formula = NA,
allow_new_levels = FALSE) %>%
mutate(model = 'model 4') %>%
group_by(.draw, condition) %>%
summarise(.epred = mean(.epred))
model.4.posteriors %>%
ungroup() %>%
compare_levels(.epred, by = condition) %>%
median_qi(.width = .95) %>%
ggplot() +
geom_pointinterval(aes(x = .epred, y = condition, xmin = .lower, xmax = .upper)) +
geom_vline(xintercept = 0, linetype = 2, color = "#979797") +
scale_x_continuous(breaks = seq(-10, 10, by = 2)) +
coord_cartesian(xlim = c(-10, 10)) +
xlab("Difference in Mean") +
theme(
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
axis.title.x = element_text(size = 16),
axis.title.y = element_blank()
)
Plot out the posteriors for mean.
model.4.posteriors %>%
ggplot(aes(x = .epred, fill = condition)) +
stat_halfeye(.width = .95) +
scale_color_theme() +
facet_wrap(condition ~ ., ncol = 1) +
theme_density_x +
scale_x_continuous(limits = c(30, 50), breaks = seq(0, 60, by = 10)) +
ggtitle(paste0('Posterior means of ', deparse(substitute(model))))
From this, we can see that there does not appear to be a difference between the two conditions.
Let’s use model 4
We will need the ESSs and RHat from this print. Also, if you want to
report the standard deviation of random intercepts or CIs for other
paramaters. You can find them via summary(..)
summary(model.4)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: pumps ~ 1 + condition
## Data: pose_raw_df (Number of observations: 2400)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.64 0.01 3.61 3.66 1.00 11789 9145
## conditionexpansive -0.01 0.02 -0.05 0.03 1.00 10845 8766
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 4.59 0.15 4.32 4.89 1.00 10138 8289
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
We compute the credible intervals (CIs; Bayesian analogy to confidence intervals) for the two conditions.
model.4.posteriors.CI <-
model.4.posteriors %>%
group_by(condition) %>%
median_qi(.epred, width = .95)
model.4.posteriors.CI
| condition | .epred | .epred.lower | .epred.upper | width | width.lower | width.upper | .width | .point | .interval |
|---|---|---|---|---|---|---|---|---|---|
| constrictive | 37.94395 | 36.92430 | 39.03394 | 0.95 | 0.95 | 0.95 | 0.95 | median | qi |
| expansive | 37.67453 | 36.64203 | 38.74556 | 0.95 | 0.95 | 0.95 | 0.95 | median | qi |
model.4.posteriors %>%
ggplot() +
geom_density(aes(x = .epred, fill = condition), alpha = .5, color = NA) +
geom_point(model.4.posteriors.CI,
mapping = aes(x = .epred, y = 0), size = 3) +
geom_errorbarh(model.4.posteriors.CI,
mapping = aes(x = .epred, xmin = .epred.lower, xmax = .epred.upper, y = 0), height = 0, linewidth = 1.5) +
facet_wrap(condition ~ ., ncol = 1) +
scale_x_continuous(limits = c(0, 80)) +
scale_y_continuous(expand = c(.02,.02)) +
scale_color_theme() +
theme_density_x
model.4.posteriors_diff <-
model.4.posteriors %>%
ungroup() %>%
compare_levels(variable = .epred, by = condition) %>%
ungroup()
head(model.4.posteriors_diff)
| .draw | condition | .epred |
|---|---|---|
| 1 | expansive - constrictive | 1.8014050 |
| 2 | expansive - constrictive | 0.4099562 |
| 3 | expansive - constrictive | -0.1681858 |
| 4 | expansive - constrictive | 1.5619406 |
| 5 | expansive - constrictive | 0.0938069 |
| 6 | expansive - constrictive | -0.3691244 |
model.4.posteriors_diff.CI <-
model.4.posteriors_diff %>%
median_qi(.epred)
model.4.posteriors_diff.CI
| .epred | .lower | .upper | .width | .point | .interval |
|---|---|---|---|---|---|
| -0.2769801 | -1.778706 | 1.204555 | 0.95 | median | qi |
model.4.posteriors_diff %>%
ggplot() +
geom_density(aes(x = .epred), alpha = .5, fill = 'skyblue', color = NA, adjust = 2) +
geom_point(model.4.posteriors_diff.CI,
mapping = aes(x = .epred, y = 0), size = 3) +
geom_errorbarh(model.4.posteriors_diff.CI,
mapping = aes(x = .epred, xmin = .lower, xmax = .upper, y = 0), height = 0, linewidth = 1.5) +
#scale_x_continuous(limits = c(-50, 50)) +
xlab('expansive - constrictive') +
ggtitle('Mean difference in expansive and constrictive') +
geom_vline(xintercept = 0, linetype = 2) +
scale_y_continuous(expand = c(.02,.02)) +
scale_x_continuous(limits = c(-10, 10), breaks = seq(-10, 10, by = 5)) +
scale_color_theme() +
theme_density_x
\[ \begin{align} y_{i} &\sim \mathrm{Neg-Binomial}(\mu, \phi) \\ log(\mu) &= \beta_{0,k} + \beta_{1} . x_i + \beta_j . x_j \\ \beta_{0, j} &\sim \mathrm{Normal}(\alpha, \sigma) \\ \alpha &\sim \mathrm{Normal}(4.1, 0.5) \\ \sigma &\sim \mathrm{Exponential}(2) \\ \beta_{0, j} &\sim \mathrm{Student\_t}(3, 0, 1) \\ \beta_{1} &\sim \mathrm{Normal}(0, 0.5) \\ \phi &\sim \mathrm{Gamma}(5, 1) \\ i & \in \{\mathrm{expansive}, \mathrm{constrictive}\}\\ j & \in \{1, ..., \mathrm{N}\} \end{align} \]
model.5 = brm(pumps ~ 1 + condition + trial + (1|participant),
data = pose_raw_df, family = negbinomial(),
prior = c(prior(normal(4.1, 0.5), class = Intercept),
prior(normal(0, 0.5), class = b),
prior(exponential(2), class = sd),
prior(gamma(5, 1), class = shape)
),
backend = BRM_BACKEND,
file = 'rds/model.4.rds',
file_refit = 'on_change' ,
iter = 4000, warmup = 1000, cores = 4, chains = 4)
## Running MCMC with 4 parallel chains...
##
## Chain 1 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 3 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 4 Iteration: 1 / 4000 [ 0%] (Warmup)
## Chain 2 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 3 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 4 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 1 Iteration: 100 / 4000 [ 2%] (Warmup)
## Chain 2 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 2 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 3 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 4 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 1 Iteration: 200 / 4000 [ 5%] (Warmup)
## Chain 2 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 3 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 4 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 2 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 1 Iteration: 300 / 4000 [ 7%] (Warmup)
## Chain 3 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 4 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 3 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 1 Iteration: 400 / 4000 [ 10%] (Warmup)
## Chain 2 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 4 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 2 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 3 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 4 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 1 Iteration: 500 / 4000 [ 12%] (Warmup)
## Chain 2 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 3 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 4 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 1 Iteration: 600 / 4000 [ 15%] (Warmup)
## Chain 3 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 2 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 1 Iteration: 700 / 4000 [ 17%] (Warmup)
## Chain 2 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 3 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 4 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 1 Iteration: 800 / 4000 [ 20%] (Warmup)
## Chain 2 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 3 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 3 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 4 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 4 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 1 Iteration: 900 / 4000 [ 22%] (Warmup)
## Chain 2 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 3 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 4 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 1 Iteration: 1000 / 4000 [ 25%] (Warmup)
## Chain 1 Iteration: 1001 / 4000 [ 25%] (Sampling)
## Chain 2 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 3 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 4 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 2 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 3 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 1 Iteration: 1100 / 4000 [ 27%] (Sampling)
## Chain 4 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 2 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 3 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 1 Iteration: 1200 / 4000 [ 30%] (Sampling)
## Chain 4 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 2 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 3 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 4 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 1 Iteration: 1300 / 4000 [ 32%] (Sampling)
## Chain 2 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 3 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 1 Iteration: 1400 / 4000 [ 35%] (Sampling)
## Chain 4 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 2 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 3 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 4 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 1 Iteration: 1500 / 4000 [ 37%] (Sampling)
## Chain 2 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 3 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 4 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 1 Iteration: 1600 / 4000 [ 40%] (Sampling)
## Chain 2 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 3 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 4 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 1 Iteration: 1700 / 4000 [ 42%] (Sampling)
## Chain 2 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 3 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 4 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 1 Iteration: 1800 / 4000 [ 45%] (Sampling)
## Chain 2 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 3 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 4 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 2 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 3 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 1 Iteration: 1900 / 4000 [ 47%] (Sampling)
## Chain 4 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 2 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 3 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 1 Iteration: 2000 / 4000 [ 50%] (Sampling)
## Chain 4 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 2 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 3 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 1 Iteration: 2100 / 4000 [ 52%] (Sampling)
## Chain 4 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 2 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 3 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 1 Iteration: 2200 / 4000 [ 55%] (Sampling)
## Chain 4 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 2 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 3 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 4 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 1 Iteration: 2300 / 4000 [ 57%] (Sampling)
## Chain 2 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 3 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 2 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 4 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 1 Iteration: 2400 / 4000 [ 60%] (Sampling)
## Chain 3 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 4 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 1 Iteration: 2500 / 4000 [ 62%] (Sampling)
## Chain 2 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 3 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 4 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 2 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 3 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 1 Iteration: 2600 / 4000 [ 65%] (Sampling)
## Chain 4 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 2 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 3 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 1 Iteration: 2700 / 4000 [ 67%] (Sampling)
## Chain 2 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 4 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 1 Iteration: 2800 / 4000 [ 70%] (Sampling)
## Chain 4 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 2 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 3 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 1 Iteration: 2900 / 4000 [ 72%] (Sampling)
## Chain 4 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 2 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 3 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 4 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 1 Iteration: 3000 / 4000 [ 75%] (Sampling)
## Chain 2 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 3 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 1 Iteration: 3100 / 4000 [ 77%] (Sampling)
## Chain 2 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 3 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 4 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 2 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 4 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 1 Iteration: 3200 / 4000 [ 80%] (Sampling)
## Chain 3 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 4 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 1 Iteration: 3300 / 4000 [ 82%] (Sampling)
## Chain 2 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 3 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 2 finished in 18.6 seconds.
## Chain 3 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 4 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 1 Iteration: 3400 / 4000 [ 85%] (Sampling)
## Chain 3 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 4 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 3 finished in 19.1 seconds.
## Chain 1 Iteration: 3500 / 4000 [ 87%] (Sampling)
## Chain 4 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 4 finished in 19.2 seconds.
## Chain 1 Iteration: 3600 / 4000 [ 90%] (Sampling)
## Chain 1 Iteration: 3700 / 4000 [ 92%] (Sampling)
## Chain 1 Iteration: 3800 / 4000 [ 95%] (Sampling)
## Chain 1 Iteration: 3900 / 4000 [ 97%] (Sampling)
## Chain 1 Iteration: 4000 / 4000 [100%] (Sampling)
## Chain 1 finished in 20.4 seconds.
##
## All 4 chains finished successfully.
## Mean chain execution time: 19.3 seconds.
## Total execution time: 20.4 seconds.
summary(model.5)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: pumps ~ 1 + condition + trial + (1 | participant)
## Data: pose_raw_df (Number of observations: 2400)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~participant (Number of levels: 80)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.31 0.03 0.27 0.37 1.00 1396 2864
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.52 0.05 3.42 3.62 1.00 855 1978
## conditionexpansive -0.02 0.07 -0.16 0.12 1.00 760 1717
## trial 0.01 0.00 0.00 0.01 1.00 11968 7407
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 7.42 0.25 6.93 7.94 1.00 12184 9233
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pose_raw_df.bayesian_poisson.y <- pose_raw_df$pumps
pose_raw_df.bayesian_poisson.yrep <- posterior_predict(model.5, ndraws = 30, seed = 1234)
ppc_dens_overlay(y = pose_raw_df.bayesian_poisson.y,
yrep = pose_raw_df.bayesian_poisson.yrep)
summary(model.5)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: pumps ~ 1 + condition + trial + (1 | participant)
## Data: pose_raw_df (Number of observations: 2400)
## Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
## total post-warmup draws = 12000
##
## Group-Level Effects:
## ~participant (Number of levels: 80)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.31 0.03 0.27 0.37 1.00 1396 2864
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.52 0.05 3.42 3.62 1.00 855 1978
## conditionexpansive -0.02 0.07 -0.16 0.12 1.00 760 1717
## trial 0.01 0.00 0.00 0.01 1.00 11968 7407
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 7.42 0.25 6.93 7.94 1.00 12184 9233
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
The results of this model are on the log-odds scale. What do the coefficients mean? The simplest way is to simply transform the data into a more interpretable scale and visualise the results:
p.model.5 = pose_raw_df %>%
ggplot() +
geom_point(aes(x = pumps, y = condition, colour = condition),
position = position_jitter(height = 0.1), alpha = 0.7) +
scale_color_theme() +
labs(y = "Condition") +
theme(
legend.position = "none",
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank()
) +
scale_x_continuous(limits = c(0, 130), breaks = seq(0, 150, by = 30))
draws.model.5 = plot_predictions(model.5, df = crossing(condition = c('expansive', 'constrictive'),
trial = 0:29,
participant = 0)) +
scale_x_continuous(breaks = seq(0, 150, by = 30)) +
coord_cartesian(xlim = c(0, 150))
wrap_plots(
draws.model.5,
p.model.5,
nrow = 2)
We generate the average of 30 trials using an average participant.
model.5.posteriors <- model.5 %>%
epred_draws(crossing(condition = c('expansive', 'constrictive'),
trial = 0:29),
re_formula = NA,
allow_new_levels = FALSE) %>%
mutate(model = 'model 5') %>%
group_by(.draw, condition) %>%
summarise(.epred = mean(.epred))
model.5.posteriors %>%
ungroup() %>%
compare_levels(.epred, by = condition) %>%
median_qi(.width = .95) %>%
ggplot() +
geom_pointinterval(aes(x = .epred, y = condition, xmin = .lower, xmax = .upper)) +
geom_vline(xintercept = 0, linetype = 2, color = "#979797") +
scale_x_continuous(breaks = seq(-10, 10, by = 2)) +
coord_cartesian(xlim = c(-10, 10)) +
xlab("Difference in Mean") +
theme(
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
axis.title.x = element_text(size = 16),
axis.title.y = element_blank()
)
Plot out the posteriors for mean.
model.5.posteriors %>%
ggplot(aes(x = .epred, fill = condition)) +
stat_halfeye(.width = .95) +
scale_color_theme() +
facet_wrap(condition ~ ., ncol = 1) +
theme_density_x +
scale_x_continuous(limits = c(30, 50), breaks = seq(0, 60, by = 10)) +
ggtitle(paste0('Posterior means of ', deparse(substitute(model))))
From this, we can see that there does not appear to be a difference between the two conditions.
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cmdstanr_0.5.2 knitr_1.39 ggplot2_3.4.0
## [4] bayesplot_1.9.0 ggdist_3.1.1 tidybayes_3.0.2
## [7] brms_2.17.0 Rcpp_1.0.8.3 modelr_0.1.8
## [10] broom.mixed_0.2.9.4 broom_1.0.0 patchwork_1.1.2
## [13] gtools_3.9.2.1 forcats_0.5.1 tidyr_1.2.0
## [16] purrr_0.3.4 tibble_3.1.7 dplyr_1.0.9
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2 ggridges_0.5.3
## [4] markdown_1.1 base64enc_0.1-3 rstudioapi_0.13
## [7] listenv_0.8.0 furrr_0.3.1 farver_2.1.1
## [10] rstan_2.21.5 bit64_4.0.5 svUnit_1.0.6
## [13] DT_0.23 fansi_1.0.3 mvtnorm_1.1-3
## [16] diffobj_0.3.5 bridgesampling_1.1-2 codetools_0.2-18
## [19] splines_4.2.2 shinythemes_1.2.0 jsonlite_1.8.0
## [22] shiny_1.7.1 readr_2.1.2 compiler_4.2.2
## [25] backports_1.4.1 assertthat_0.2.1 Matrix_1.5-1
## [28] fastmap_1.1.0 cli_3.4.1 later_1.3.0
## [31] htmltools_0.5.2 prettyunits_1.1.1 tools_4.2.2
## [34] igraph_1.3.1 coda_0.19-4 gtable_0.3.0
## [37] glue_1.6.2 reshape2_1.4.4 posterior_1.2.1
## [40] jquerylib_0.1.4 vctrs_0.5.1 nlme_3.1-160
## [43] crosstalk_1.2.0 tensorA_0.36.2 xfun_0.31
## [46] stringr_1.4.0 globals_0.15.0 ps_1.7.0
## [49] mime_0.12 miniUI_0.1.1.1 lifecycle_1.0.3
## [52] future_1.26.1 zoo_1.8-10 scales_1.2.0
## [55] vroom_1.5.7 colourpicker_1.1.1 hms_1.1.1
## [58] promises_1.2.0.1 Brobdingnag_1.2-7 parallel_4.2.2
## [61] inline_0.3.19 shinystan_2.6.0 yaml_2.3.5
## [64] gridExtra_2.3 loo_2.5.1 StanHeaders_2.21.0-7
## [67] sass_0.4.1 stringi_1.7.6 highr_0.9
## [70] dygraphs_1.1.1.6 checkmate_2.1.0 pkgbuild_1.3.1
## [73] rlang_1.0.6 pkgconfig_2.0.3 matrixStats_0.62.0
## [76] distributional_0.3.0 evaluate_0.15 lattice_0.20-45
## [79] labeling_0.4.2 rstantools_2.2.0 htmlwidgets_1.5.4
## [82] cowplot_1.1.1 bit_4.0.4 tidyselect_1.1.2
## [85] processx_3.5.3 parallelly_1.31.1 plyr_1.8.7
## [88] magrittr_2.0.3 R6_2.5.1 generics_0.1.3
## [91] DBI_1.1.2 pillar_1.7.0 withr_2.5.0
## [94] xts_0.12.1 abind_1.4-5 crayon_1.5.1
## [97] arrayhelpers_1.1-0 utf8_1.2.2 tzdb_0.3.0
## [100] rmarkdown_2.14 grid_4.2.2 data.table_1.14.2
## [103] callr_3.7.0 threejs_0.3.3 digest_0.6.29
## [106] xtable_1.8-4 httpuv_1.6.5 RcppParallel_5.1.5
## [109] stats4_4.2.2 munsell_0.5.0 bslib_0.3.1
## [112] shinyjs_2.1.0